In [1]:
!pip install pyforest
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: pyforest in c:\users\admin\appdata\roaming\python\python311\site-packages (1.1.0)
In [2]:
df = pd.read_excel("IPL.xlsx")
In [3]:
df
Out[3]:
Sl.NO. PLAYER NAME AGE COUNTRY TEAM PLAYING ROLE T-RUNS T-WKTS ODI-RUNS-S ODI-SR-B ODI-WKTS ODI-SR-BL CAPTAINCY EXP RUNS-S HS AVE SR-B SIXERS RUNS-C WKTS AVE-BL ECON SR-BL AUCTION YEAR BASE PRICE SOLD PRICE
0 1 Abdulla, YA 2 SA KXIP Allrounder 0 0 0 0.00 0 0.0 0 0 0 0.00 0.00 0 307 15 20.47 8.90 13.93 2009 50000 50000
1 2 Abdur Razzak 2 BAN RCB Bowler 214 18 657 71.41 185 37.6 0 0 0 0.00 0.00 0 29 0 0.00 14.50 0.00 2008 50000 50000
2 3 Agarkar, AB 2 IND KKR Bowler 571 58 1269 80.62 288 32.9 0 167 39 18.56 121.01 5 1059 29 36.52 8.81 24.90 2008 200000 350000
3 4 Ashwin, R 1 IND CSK Bowler 284 31 241 84.56 51 36.8 0 58 11 5.80 76.32 0 1125 49 22.96 6.23 22.14 2011 100000 850000
4 5 Badrinath, S 2 IND CSK Batsman 63 0 79 45.93 0 0.0 0 1317 71 32.93 120.71 28 0 0 0.00 0.00 0.00 2011 100000 800000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 126 Yadav, AS 2 IND DC Batsman 0 0 0 0.00 0 0.0 0 49 16 9.80 125.64 2 0 0 0.00 0.00 0.00 2010 50000 750000
126 127 Younis Khan 2 PAK RR Batsman 6398 7 6814 75.78 3 86.6 1 3 3 3.00 42.85 0 0 0 0.00 0.00 0.00 2008 225000 225000
127 128 Yuvraj Singh 2 IND KXIP+ Batsman 1775 9 8051 87.58 109 44.3 1 1237 66 26.32 131.88 67 569 23 24.74 7.02 21.13 2011 400000 1800000
128 129 Zaheer Khan 2 IND MI+ Bowler 1114 288 790 73.55 278 35.4 0 99 23 9.90 91.67 1 1783 65 27.43 7.75 21.26 2008 200000 450000
129 130 Zoysa, DNT 2 SL DC Bowler 288 64 343 95.81 108 39.4 0 11 10 11.00 122.22 0 99 2 49.50 9.00 33.00 2008 100000 110000

130 rows × 26 columns

In [6]:
df.describe(include='all').T
Out[6]:
count unique top freq mean std min 25% 50% 75% max
Sl.NO. 130.0 NaN NaN NaN 65.5 37.671829 1.0 33.25 65.5 97.75 130.0
PLAYER NAME 130 130 Abdulla, YA 1 NaN NaN NaN NaN NaN NaN NaN
AGE 130.0 NaN NaN NaN 2.092308 0.576627 1.0 2.0 2.0 2.0 3.0
COUNTRY 130 10 IND 53 NaN NaN NaN NaN NaN NaN NaN
TEAM 130 17 CSK 14 NaN NaN NaN NaN NaN NaN NaN
PLAYING ROLE 130 4 Bowler 44 NaN NaN NaN NaN NaN NaN NaN
T-RUNS 130.0 NaN NaN NaN 2166.715385 3305.646757 0.0 25.5 542.5 3002.25 15470.0
T-WKTS 130.0 NaN NaN NaN 66.530769 142.676855 0.0 0.0 7.0 47.5 800.0
ODI-RUNS-S 130.0 NaN NaN NaN 2508.738462 3582.205625 0.0 73.25 835.0 3523.5 18426.0
ODI-SR-B 130.0 NaN NaN NaN 71.164385 25.89844 0.0 65.65 78.225 86.79 116.66
ODI-WKTS 130.0 NaN NaN NaN 76.076923 111.20507 0.0 0.0 18.5 106.0 534.0
ODI-SR-BL 130.0 NaN NaN NaN 34.033846 26.751749 0.0 0.0 36.6 45.325 150.0
CAPTAINCY EXP 130.0 NaN NaN NaN 0.315385 0.466466 0.0 0.0 0.0 1.0 1.0
RUNS-S 130.0 NaN NaN NaN 514.246154 615.226335 0.0 39.0 172.0 925.25 2254.0
HS 130.0 NaN NaN NaN 47.430769 36.403624 0.0 16.0 35.5 73.75 158.0
AVE 130.0 NaN NaN NaN 18.719308 11.094224 0.0 9.825 18.635 27.8725 50.11
SR-B 130.0 NaN NaN NaN 111.053462 35.928907 0.0 98.2375 118.51 129.1025 235.49
SIXERS 130.0 NaN NaN NaN 17.692308 23.828146 0.0 1.0 6.0 29.75 129.0
RUNS-C 130.0 NaN NaN NaN 475.523077 558.314049 0.0 0.0 297.0 689.25 1975.0
WKTS 130.0 NaN NaN NaN 17.169231 21.816763 0.0 0.0 8.5 23.75 83.0
AVE-BL 130.0 NaN NaN NaN 23.110231 20.802057 0.0 0.0 24.785 35.58 126.3
ECON 130.0 NaN NaN NaN 6.204462 4.941531 0.0 0.0 7.38 8.2475 38.11
SR-BL 130.0 NaN NaN NaN 17.382615 15.273422 0.0 0.0 19.935 26.2125 100.2
AUCTION YEAR 130.0 NaN NaN NaN 2009.092308 1.377821 2008.0 2008.0 2008.0 2011.0 2011.0
BASE PRICE 130.0 NaN NaN NaN 192230.769231 153097.300897 20000.0 100000.0 200000.0 225000.0 1350000.0
SOLD PRICE 130.0 NaN NaN NaN 521223.076923 406807.351419 20000.0 225000.0 437500.0 700000.0 1800000.0
In [9]:
df.dtypes.to_frame()
Out[9]:
0
Sl.NO. int64
PLAYER NAME object
AGE int64
COUNTRY object
TEAM object
PLAYING ROLE object
T-RUNS int64
T-WKTS int64
ODI-RUNS-S int64
ODI-SR-B float64
ODI-WKTS int64
ODI-SR-BL float64
CAPTAINCY EXP int64
RUNS-S int64
HS int64
AVE float64
SR-B float64
SIXERS int64
RUNS-C int64
WKTS int64
AVE-BL float64
ECON float64
SR-BL float64
AUCTION YEAR int64
BASE PRICE int64
SOLD PRICE int64
In [10]:
df.size
Out[10]:
3380
In [11]:
df.shape
Out[11]:
(130, 26)
In [12]:
df.isnull().sum()
Out[12]:
Sl.NO.           0
PLAYER NAME      0
AGE              0
COUNTRY          0
TEAM             0
PLAYING ROLE     0
T-RUNS           0
T-WKTS           0
ODI-RUNS-S       0
ODI-SR-B         0
ODI-WKTS         0
ODI-SR-BL        0
CAPTAINCY EXP    0
RUNS-S           0
HS               0
AVE              0
SR-B             0
SIXERS           0
RUNS-C           0
WKTS             0
AVE-BL           0
ECON             0
SR-BL            0
AUCTION YEAR     0
BASE PRICE       0
SOLD PRICE       0
dtype: int64

1. Plot the following charts (as shown in below fig.,) and mention any two inferences.¶

In [14]:
import matplotlib.pyplot as plt
import seaborn as sn
In [19]:
plt.figure(figsize=(30,30))
sn.boxplot(data=df)
Out[19]:
<Axes: >
In [20]:
sn.boxplot(x="COUNTRY", y="SOLD PRICE", data=df)
Out[20]:
<Axes: xlabel='COUNTRY', ylabel='SOLD PRICE'>
In [152]:
bar = sn.countplot(x='AGE' , data = df, hue='PLAYING ROLE', palette='turbo_r', )
for container in bar.containers:
    bar.bar_label(container)
plt.show()

2. Draw a Donut chart for any categorical Variable.¶

  • Distribution of IPL Players by Country
In [116]:
count = df['COUNTRY'].value_counts()
In [117]:
count
Out[117]:
COUNTRY
IND    53
AUS    22
SA     16
SL     12
PAK     9
NZ      7
WI      6
ENG     3
BAN     1
ZIM     1
Name: count, dtype: int64
In [118]:
lable = df['COUNTRY'].value_counts().index
In [119]:
lable
Out[119]:
Index(['IND', 'AUS', 'SA', 'SL', 'PAK', 'NZ', 'WI', 'ENG', 'BAN', 'ZIM'], dtype='object', name='COUNTRY')
In [150]:
plt.figure(figsize=(25,25))
plt.pie(count, labels=lable, autopct='%.2f%%', startangle=90 , textprops={'fontsize': 14})

centre_circle=plt.Circle((0,0),0.70, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

plt.axis('equal')
plt.show()

3. It has been observed that one/few outliers are present in the variable ‘SOLD PRICE’.¶

  • Draw a box plot for ‘SOLD PRICE’ with outliers & find the value of IQR, Upper limit & lower limit.
  • Identify & Remove outliers with the help of Z-Score.
In [31]:
Q1=df['SOLD PRICE'].quantile(0.25)
Q2=df['SOLD PRICE'].quantile(0.50)
Q3=df['SOLD PRICE'].quantile(0.75)
Q4=df['SOLD PRICE'].quantile(1)
In [32]:
print(Q1)
print(Q2)
print(Q3)
print(Q4)
225000.0
437500.0
700000.0
1800000.0
In [33]:
IQR = Q3-Q1
In [34]:
print(IQR)
475000.0
In [36]:
UW= Q3+(1.5*IQR)
LW = Q1-(1.5*IQR)
In [37]:
print(UW)
print(LW)
1412500.0
-487500.0
In [53]:
out_liers = df[(df["SOLD PRICE"] > UW) | (df["SOLD PRICE"] < LW)]
In [54]:
out_liers
Out[54]:
Sl.NO. PLAYER NAME AGE COUNTRY TEAM PLAYING ROLE T-RUNS T-WKTS ODI-RUNS-S ODI-SR-B ODI-WKTS ODI-SR-BL CAPTAINCY EXP RUNS-S HS AVE SR-B SIXERS RUNS-C WKTS AVE-BL ECON SR-BL AUCTION YEAR BASE PRICE SOLD PRICE
15 16 Dhoni, MS 2 IND CSK W. Keeper 3509 0 6773 88.19 1 12.0 1 1782 70 37.13 136.45 64 0 0 0.00 0.00 0.00 2008 400000 1500000
23 24 Flintoff, A 2 ENG CSK Allrounder 3845 226 3394 88.82 169 33.2 1 62 24 31.00 116.98 2 105 2 52.50 9.55 33.00 2009 950000 1550000
50 51 Kohli, V 1 IND RCB Batsman 491 0 3590 86.31 2 137.0 1 1639 73 28.26 119.29 49 345 4 86.25 8.84 58.50 2011 150000 1800000
83 84 Pietersen, KP 2 ENG RCB+ Batsman 6654 5 4184 86.76 7 57.1 1 634 103 42.27 141.20 30 215 7 30.71 7.41 24.86 2009 1350000 1550000
93 94 Sehwag, V 2 IND DD Batsman 8178 40 8090 104.68 95 45.4 1 1879 119 30.31 167.32 79 226 6 37.67 10.56 21.67 2011 400000 1800000
111 112 Tendulkar, SR 3 IND MI Batsman 15470 45 18426 86.23 154 52.2 1 2047 100 37.91 119.22 24 58 0 0.00 9.67 0.00 2011 400000 1800000
113 114 Tiwary, SS 1 IND MI+ Batsman 0 0 49 87.50 0 0.0 0 836 42 25.33 119.60 32 0 0 0.00 0.00 0.00 2011 100000 1600000
127 128 Yuvraj Singh 2 IND KXIP+ Batsman 1775 9 8051 87.58 109 44.3 1 1237 66 26.32 131.88 67 569 23 24.74 7.02 21.13 2011 400000 1800000
In [40]:
sn.boxplot(df["SOLD PRICE"])
Out[40]:
<Axes: >
In [41]:
import plotly.express as p
In [43]:
p.box(df['SOLD PRICE'])
In [44]:
z_scores = stats.zscore(df['SOLD PRICE'])
In [45]:
z_scores
Out[45]:
0     -1.162826
1     -1.162826
2     -0.422523
3      0.811315
4      0.687931
         ...   
125    0.564547
126   -0.730982
127    3.155606
128   -0.175755
129   -1.014765
Name: SOLD PRICE, Length: 130, dtype: float64
In [46]:
outliers = df[(z_scores > 3) | (z_scores < -3)]
In [47]:
outliers
Out[47]:
Sl.NO. PLAYER NAME AGE COUNTRY TEAM PLAYING ROLE T-RUNS T-WKTS ODI-RUNS-S ODI-SR-B ODI-WKTS ODI-SR-BL CAPTAINCY EXP RUNS-S HS AVE SR-B SIXERS RUNS-C WKTS AVE-BL ECON SR-BL AUCTION YEAR BASE PRICE SOLD PRICE
50 51 Kohli, V 1 IND RCB Batsman 491 0 3590 86.31 2 137.0 1 1639 73 28.26 119.29 49 345 4 86.25 8.84 58.50 2011 150000 1800000
93 94 Sehwag, V 2 IND DD Batsman 8178 40 8090 104.68 95 45.4 1 1879 119 30.31 167.32 79 226 6 37.67 10.56 21.67 2011 400000 1800000
111 112 Tendulkar, SR 3 IND MI Batsman 15470 45 18426 86.23 154 52.2 1 2047 100 37.91 119.22 24 58 0 0.00 9.67 0.00 2011 400000 1800000
127 128 Yuvraj Singh 2 IND KXIP+ Batsman 1775 9 8051 87.58 109 44.3 1 1237 66 26.32 131.88 67 569 23 24.74 7.02 21.13 2011 400000 1800000
In [49]:
cleaned_data = df[(z_scores <=3) & (z_scores>=-3)]
In [50]:
cleaned_data
Out[50]:
Sl.NO. PLAYER NAME AGE COUNTRY TEAM PLAYING ROLE T-RUNS T-WKTS ODI-RUNS-S ODI-SR-B ODI-WKTS ODI-SR-BL CAPTAINCY EXP RUNS-S HS AVE SR-B SIXERS RUNS-C WKTS AVE-BL ECON SR-BL AUCTION YEAR BASE PRICE SOLD PRICE
0 1 Abdulla, YA 2 SA KXIP Allrounder 0 0 0 0.00 0 0.0 0 0 0 0.00 0.00 0 307 15 20.47 8.90 13.93 2009 50000 50000
1 2 Abdur Razzak 2 BAN RCB Bowler 214 18 657 71.41 185 37.6 0 0 0 0.00 0.00 0 29 0 0.00 14.50 0.00 2008 50000 50000
2 3 Agarkar, AB 2 IND KKR Bowler 571 58 1269 80.62 288 32.9 0 167 39 18.56 121.01 5 1059 29 36.52 8.81 24.90 2008 200000 350000
3 4 Ashwin, R 1 IND CSK Bowler 284 31 241 84.56 51 36.8 0 58 11 5.80 76.32 0 1125 49 22.96 6.23 22.14 2011 100000 850000
4 5 Badrinath, S 2 IND CSK Batsman 63 0 79 45.93 0 0.0 0 1317 71 32.93 120.71 28 0 0 0.00 0.00 0.00 2011 100000 800000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
124 125 White, CL 2 AUS RCB+ Batsman 146 5 2037 80.48 12 27.5 1 745 78 31.04 132.09 29 70 0 0.00 14.00 0.00 2008 100000 500000
125 126 Yadav, AS 2 IND DC Batsman 0 0 0 0.00 0 0.0 0 49 16 9.80 125.64 2 0 0 0.00 0.00 0.00 2010 50000 750000
126 127 Younis Khan 2 PAK RR Batsman 6398 7 6814 75.78 3 86.6 1 3 3 3.00 42.85 0 0 0 0.00 0.00 0.00 2008 225000 225000
128 129 Zaheer Khan 2 IND MI+ Bowler 1114 288 790 73.55 278 35.4 0 99 23 9.90 91.67 1 1783 65 27.43 7.75 21.26 2008 200000 450000
129 130 Zoysa, DNT 2 SL DC Bowler 288 64 343 95.81 108 39.4 0 11 10 11.00 122.22 0 99 2 49.50 9.00 33.00 2008 100000 110000

126 rows × 26 columns

4. Assume that we would like to filter certain records such as the players who have hit more than 80 sixers in the IPL tournament.¶

  • Write a code to display the data frame with only the player name & sixers, for the same.
In [99]:
Players_name = df['PLAYER NAME']
Sixers = df['SIXERS']
In [100]:
Data = { 'Player_name': Players_name, 'Sixers':Sixers}
In [101]:
D = pd.DataFrame(Data)
In [102]:
D
Out[102]:
Player_name Sixers
0 Abdulla, YA 0
1 Abdur Razzak 0
2 Agarkar, AB 5
3 Ashwin, R 0
4 Badrinath, S 28
... ... ...
125 Yadav, AS 2
126 Younis Khan 0
127 Yuvraj Singh 67
128 Zaheer Khan 1
129 Zoysa, DNT 0

130 rows × 2 columns

In [106]:
Player_hit_more_than_80_sixers = D[(D["Player_name"] == 'Player_name') | (D["Sixers"] > 80)]
In [107]:
Player_hit_more_than_80_sixers
Out[107]:
Player_name Sixers
26 Gayle, CH 129
28 Gilchrist, AC 86
82 Pathan, YK 81
88 Raina, SK 97
97 Sharma, RG 82